Better data than mine:
Load and merge:
url = "https://github.com/favstats/USElection2020-EdisonResearch-Results/raw/main/data/latest/presidential.csv"
results2020 = read.csv(url)
megafile = read.table("eday-covid.txt", header = TRUE)
library(dplyr)
results = left_join(results2020, megafile, by = "fips")
Add swings:
results$Swing2020 = results$margin2020 - results$margin2016
results$Swing2016 = results$margin2016 - results$margin2012
Easy map:
# Needs a fips variable
library(usmap)
plot_usmap(regions = "counties", include = "IN",
data = results, values = "margin2020") +
scale_fill_gradient2(low = "darkblue", mid = "darkorchid",
high = "red", name = "Trump margin") +
theme(legend.position = "right")
County map:
us_states = map_data("state")
county_map$id = as.numeric(county_map$id)
# joining with election results
county.election.df = left_join(county_map, results, by = c("id" = "fips"))
#county.election.df = drop_na(county.election.df)
ggplot(county.election.df, aes(x = long, y = lat, fill = margin2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + labs(fill = "Trump margin") + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")
Nerf the color scale:
nerf = county.election.df
nerf$margin2020[nerf$margin2020 > 25] = 25
nerf$margin2020[nerf$margin2020 < -25] = -25
ggplot(nerf, aes(x = long, y = lat, fill = margin2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + labs(fill = "Trump margin") + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")
nerfswing = county.election.df
nerfswing$Swing2020[nerfswing$Swing2020 > 10] = 10
nerfswing$Swing2020[nerfswing$Swing2020 < -10] = -10
ggplot(nerfswing, aes(x = long, y = lat, fill = Swing2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")
2020 swing vs. 2016 swing:
results %>%
ggplot(aes(Swing2016, Swing2020)) +
geom_smooth(method = "lm") +
geom_point()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
#geom_point(data = filter(compresults, votes >= 50000))
2020 swing vs. college education:
results %>%
ggplot(aes(college, Swing2020)) + geom_smooth(se = FALSE) +
geom_point(alpha = 0.1) +
scale_x_log10() +
xlab("Percent with a college degree (log scale)") +
ylab("Swing (positive means Trump did better in 2020)") +
ggtitle("Education polarization increased again in 2020") +
labs(subtitle = "Trump improved in low education counties, did worse in high education counties")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 61 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).
Split by region:
results %>%
filter(census_region != "NA") %>%
ggplot(aes(college, Swing2020)) + geom_smooth(se = FALSE) +
geom_point(alpha = 0.1) +
scale_x_log10() +
xlab("Percent with a college degree (log scale)") +
ylab("Swing (positive means Trump did better in 2020)") +
ggtitle("Education polarization increased again in 2020") +
labs(subtitle = "Trump improved in low education counties, did worse in high education counties") +
facet_wrap(~census_region)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
How does this compare to 2016?
results %>%
ggplot(aes(college, Swing2016)) + geom_smooth(se = FALSE) +
geom_point(alpha = 0.3) +
scale_x_log10()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 61 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).
Swing by Hispanic percentage:
results$stateA = recode_factor(results$state.x, Florida = "Florida", Texas = "Texas", .default = "Everywhere else")
results %>%
filter(stateA != "NA") %>%
ggplot(aes(hisp_pct * 100, Swing2020)) + geom_point(alpha = 0.3) +
geom_smooth(se = FALSE) +
# scale_x_log10() +
xlim(10, 100) +
facet_wrap(~ stateA) +
xlab("Hispanic percentage") +
ylab("Swing (positive means Trump did better in 2020)") +
ggtitle("Swing in counties with at least 10% Hispanic population")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2328 rows containing non-finite values (stat_smooth).
## Warning: Removed 2328 rows containing missing values (geom_point).
Swing by COVID deaths per capita:
results %>%
ggplot(aes(deaths/popestimate2019, Swing2020)) +
geom_point() +
#geom_smooth(method = "lm") +
geom_smooth(method = "gam", color = "orange") +
scale_x_log10()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 351 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).
Was 538 right?
display(lm(margin2020 ~ margin2012 + margin2016, data = results))
## lm(formula = margin2020 ~ margin2012 + margin2016, data = results)
## coef.est coef.se
## (Intercept) -1.51 0.15
## margin2012 -0.10 0.01
## margin2016 1.12 0.01
## ---
## n = 3110, k = 3
## residual sd = 5.19, R-Squared = 0.97
What’s the correlation of swing with (log) college education?
cor(log(results$college), results$Swing2020, use = "pairwise")
## [1] -0.5634693
display(lm(Swing2020 ~ log(college), data = results))
## lm(formula = Swing2020 ~ log(college), data = results)
## coef.est coef.se
## (Intercept) -12.11 0.33
## log(college) -7.59 0.20
## ---
## n = 3098, k = 2
## residual sd = 4.42, R-Squared = 0.32
Is swing related to race? (Note that since the percentages are small and the data is aggregated, this may be misleading.)
display(lm(Swing2020 ~ black_pct + hisp_pct + asian_pct, data = results))
## lm(formula = Swing2020 ~ black_pct + hisp_pct + asian_pct, data = results)
## coef.est coef.se
## (Intercept) -0.17 0.13
## black_pct 1.03 0.63
## hisp_pct 10.95 0.66
## asian_pct -57.06 3.46
## ---
## n = 3098, k = 4
## residual sd = 4.98, R-Squared = 0.13
Does the Hispanic result hold in e.g. Illinois?
results %>%
filter(state.x == "Illinois") %>%
ggplot(aes(hisp_pct, Swing2020)) + geom_text(aes(label = county))
## Warning: Removed 1 rows containing missing values (geom_text).
Is there a relationship with COVID rate?
display(lm(Swing2020 ~ log((cases+1)/popestimate2019), data = results))
## lm(formula = Swing2020 ~ log((cases + 1)/popestimate2019), data = results)
## coef.est coef.se
## (Intercept) 5.95 0.52
## log((cases + 1)/popestimate2019) 1.55 0.14
## ---
## n = 3098, k = 2
## residual sd = 5.25, R-Squared = 0.04
Is there a relationship with COVID rate after accounting for college education?
display(lm(Swing2020 ~ log((deaths+1)/popestimate2019) + log(college), data = results))
## lm(formula = Swing2020 ~ log((deaths + 1)/popestimate2019) +
## log(college), data = results)
## coef.est coef.se
## (Intercept) -8.82 0.82
## log((deaths + 1)/popestimate2019) 0.39 0.09
## log(college) -7.39 0.20
## ---
## n = 3098, k = 3
## residual sd = 4.41, R-Squared = 0.32